
taskID <- as.integer(Sys.getenv("SLURM_ARRAY_TASK_ID"))

require(Matrix)

#save simulation settings by array number
Np = c(5,10, 15)
N = c(1000,5000,10000)

sim_grid <- expand.grid(Np, N)

pop_params <- sim_grid[as.integer(taskID/100)+1,]

epsilon = c(0.3,0.6, 0.9)
prop = c(0.25,0.5,0.75)

sim_grid <- expand.grid(epsilon, prop)


block_params <- sim_grid[as.integer(taskID/100)+1,]


mle_lambda <- function(rds_network, current_sample,seeds, gamma_prior = NULL){
  #calculate the mle lambda given the current network proposal
  if( is.null(gamma_prior)){
    gamma_prior = c(0,0)
  }
  n <- dim(current_sample)[1]
  components <- Likelihood_components(rds_network = rds_network, proposal = current_sample)
  mle_lambda <- (n - length(seeds) + gamma_prior[1])/(t(components$s) %*% components$w + gamma_prior[2])
  return(mle_lambda)
}

Likelihood_components <- function(rds_network, proposal){
  #Likelihood components
  
  w <- rds_network$times
  u <- rds_network$degrees[rds_network$recruited] - rowSums(proposal)
  #let's start constructing s
  coupon_matrix <- rds_network$final_coupon_matrix
  AC <- proposal %*% coupon_matrix
  lt_AC <- AC * as.numeric(lower.tri(AC, diag = T))
  s <- rowSums(t(lt_AC)) + t(coupon_matrix) %*% u 
  return(list(w = w, u = u, s = s, lt_AC = lt_AC))
}



log_likelihood <- function(rds_network, current_sample, lambda, seeds){
  #log likelihood
  
  components <- Likelihood_components(rds_network, current_sample)
  
  n <- dim(current_sample)[1]
  
  log_likelihood <- -(lambda*(t(components$s) %*% components$w) - 
                        sum(log(rep(lambda, length(components$s)-length(seeds) )* 
                                  components$s[-(1:length(seeds))]
                        )
                        )
  )
  return(log_likelihood)
  
}



new_proposal_flip<- function(rds_network, current_sample){
  #New proposal for hill-climbing to find MLE
  
  #degree resources
  u <- rds_network$degrees[rds_network$recruited] - rowSums(current_sample)
  #underlying rds network
  base <- rds_network$network_recruited
  #number of nodes
  size <- dim(current_sample)[1]
  #indicate that the loop has found a new proposal
  found <- F
  #when finding a new sample, start at old one
  new_sample <- current_sample
  #start the loop
  while(!found){
    #select random nodes
    if(runif(1) < 0.5){
      difference <- current_sample - base
      subtractable_vertices<- which(difference == 1, arr.ind = T)
      num <- dim(subtractable_vertices)[1]
      if(num == 0){
        return(list(new_sample = current_sample, 0))
      }
      random <- sample(1:num, 1)
      random_vertices <- subtractable_vertices[random,]
      random_vertices <- sort(random_vertices)
      if(current_sample[random_vertices[1], random_vertices[2]] == 1 & 
         base[random_vertices[1], random_vertices[2]] == 0){
        #subtract the edge
        new_sample[random_vertices[1], random_vertices[2]]<- 0
        new_sample[random_vertices[2], random_vertices[1]] <- 0
        found <- T
        direction <- "subtract"
      } else{
        print("error")
      }
    } else{
      u_ind <- as.numeric(u>0)
      add_matrix <-(1-current_sample) * (u_ind %*% t(u_ind)) * (1- diag(1,size, size))
      add_vertices<- which(add_matrix == 1, arr.ind = T)
      num <- dim(add_vertices)[1]
      if(num == 0){
        return(list(new_sample = current_sample, 0))
      }
      random <- sample(1:num, 1)
      random_vertices <- add_vertices[random,]
      random_vertices <- sort(random_vertices)
      if (sum(u[random_vertices] > 0) ==2 & current_sample[random_vertices[1], random_vertices[2]] == 0){
        #add the edge
        new_sample[random_vertices[1], random_vertices[2]] <- 1
        new_sample[random_vertices[2], random_vertices[1]] <- 1
        found <- T
        direction <- "add"
      } else{
        print("error")
      }
    }

  }
  return(list(new_sample = new_sample,direction = direction, random_vertices = random_vertices ))
  
}


#alternative proposal mechanism
new_proposal<- function(rds_network, current_sample){
  #New proposal network for hill-climbing to MLE for G_S
  
  #degree resources
  u <- rds_network$degrees[rds_network$recruited] - rowSums(current_sample)
  #underlying rds network
  base <- rds_network$network_recruited
  #number of nodes
  size <- dim(current_sample)[1]
  #indicate that the loop has found a new proposal
  found <- F
  #when finding a new sample, start at old one
  new_sample <- current_sample
  #start the loop
  while(!found){
    #select random nodes
    random_vertices<- sample(x = 1:size, size = 2, replace = FALSE)
    random_vertices <- sort(random_vertices)
    #if the degree of the two vertices is sufficient and the edge does not exist, then:
    if (sum(u[random_vertices] > 0) ==2 & current_sample[random_vertices[1], random_vertices[2]] == 0){
      #add the edge
      new_sample[random_vertices[1], random_vertices[2]] <- 1
      new_sample[random_vertices[2], random_vertices[1]] <- 1
      found <- T
      direction <- "add"
    }
    #if the edge between the two random nodes is present, but is absent from baseline RDS
    if(current_sample[random_vertices[1], random_vertices[2]] == 1 & 
       base[random_vertices[1], random_vertices[2]] == 0){
      #subtract the edge
      new_sample[random_vertices[1], random_vertices[2]]<- 0
      new_sample[random_vertices[2], random_vertices[1]] <- 0
      found <- T
      direction <- "subtract"
    }
  }
  return(list(new_sample = new_sample,direction = direction, random_vertices = random_vertices ))
  
}



regularized_like_N <- function(alpha, beta, c, N, G_s, degrees, n){
  #generate the regularized mle for N
  
  d_u <- numeric(n)
  
  for ( i in 1:n ){
    d_u[i] <- degrees[i] - sum(G_s[i, 1:(i-1)])
  }
  
  first_comp <- sum(lchoose(N-1:n,d_u)) 
  second_comp <- lbeta(sum(d_u) + alpha, 
                       n*N - choose(n+1, 2) - sum(d_u) + beta)
  third_comp <- -c*log(N)
  
  return(first_comp + second_comp + third_comp)
}

regularized_mle_N <- function(alpha, beta, c, G_s, degrees, n){
  #maximize the regularized likelihood
  
  N <- n*2
  higher_like <- regularized_like_N(alpha, beta, c, N+5, G_s, degrees, n)
  lower_like <- regularized_like_N(alpha, beta, c, N-5, G_s, degrees, n)
  higher_search <- higher_like > lower_like
  if(higher_search){
    N <- N+5
  }
  else{
    N <- N-5
  }
  go <- T
  i <- 0 
  while (go){
    if(higher_search){
      new_higher_like <- regularized_like_N(alpha, beta, c, N+5, G_s, degrees, n)
      if (new_higher_like > higher_like){
        N <- N+5
        higher_like <- new_higher_like
      }
      else{
        return(N+5)
      }
    }
    else{
      new_lower_like <- regularized_like_N(alpha, beta, c, N-10, G_s, degrees, n)
      if (new_lower_like > lower_like){
        N <- N-5
        lower_like <- new_lower_like
      }
      else{
        return(N-5)
      }
    }
  }
  
  i <- i +1 
  
  if (i > 5000){
    print("exceeded")
    return(N)
  }
}


block_model <- F #set to produce Simulation 1
rds_results <- readRDS(paste0('rds_', taskID, '.rds'))
rds <- rds_results$rds

if(block_model){
  N <- 5000
  prob <- in_prob
  epsilon <- as.double(block_params[1])
  prop <- as.double(block_params[2])
  in_prob <- rds_results$in_prob
  
}else{
  epsilon <- NA
  prop <- NA
  N<- as.double(pop_params[2])
  prob <- as.double(pop_params[1]/pop_params[2])
  
}

seeds <- rds$seeds
components <- Likelihood_components(rds, rds$true_network)
s_vector <- components$s
print("start")
print(s_vector)
print(seeds)
times <- rexp(length(s_vector)-length(seeds), as.vector(s_vector)[-(1:length(seeds))])
rds$times <- c(rep(0, length(seeds)),times)
print(sum(rds$true_network))

#MLE OF LAMBDA OF TRUE GRAPH
best_mle_know <-as.numeric(mle_lambda(rds, rds$true_network, seeds))

best_likelihood_know <- log_likelihood(rds, rds$true_network, best_mle_know, seeds)

print(best_mle_know)



#initialize parameters
current_sample <- rds$network_recruited
mle <- mle_lambda(rds, rds$network_recruited, seeds)
current_log_likelihood <- log_likelihood(rds, rds$network_recruited, mle, seeds)

for(i in 1:5000){
  #propose new graph - equally likely to propose subtraction and addition
  proposal <- new_proposal_flip(rds, current_sample)
  proposal <- proposal$new_sample
  
  #find lambda mle and determine likelihood at the mle
  proposal_mle <- mle_lambda(rds, proposal, seeds)
  proposal_likelihood <- log_likelihood(rds, proposal, proposal_mle, seeds)
  
  
  if(as.matrix(proposal_likelihood)> as.matrix(current_log_likelihood)){
    
    #climb is proposal is better than current parameters
    current_log_likelihood <- proposal_likelihood
    current_sample <- proposal
    mle <- proposal_mle
  } 
}

given_mle <- as.numeric(mle)
given_edges <- as.numeric(sum(current_sample))
original_G_S_est <- current_sample
                          
print("given_mle")
print(given_mle)
print("given_edges")
print(given_edges)


true_times <- rds$times


lambda_seq<- seq(given_mle - 0.6 , given_mle + 0.6 , by = 0.1)
bias_vec <- numeric(length(lambda_seq))
mle_vec <- numeric(length(lambda_seq))

for(j in 1:length(lambda_seq)){
  
  #generate expected value of lambda MLE with set lambda^k
  static_lambda <- lambda_seq[j]
  
  q <- 25
  mle_bias_vec <- numeric(q)
  

  for(k in 1:q){
    
    #generate most likely subgraph with set lambda^k
    rds$times <- true_times
    
    current_sample <- rds$network_recruited
    current_log_likelihood <- log_likelihood(rds, current_sample, static_lambda, seeds)
    
    #hill climb to find optimal G_S
    for(i in 1:5000){
      
      #evaluate new proposal with static mle
      proposal <- new_proposal_flip(rds, current_sample)
      proposal <- proposal$new_sample
      proposal_likelihood <- log_likelihood(rds, proposal, static_lambda, seeds)
      
      #determine whether new graph is better for static mle
      if(as.matrix(proposal_likelihood)> as.matrix(current_log_likelihood)){
        current_log_likelihood <- proposal_likelihood
        current_sample <- proposal
      } 
    }
    
    given_G_S <- current_sample
    print(sum(given_G_S))
    
    s_vector <- Likelihood_components(rds, given_G_S)$s
    times <- rexp(length(s_vector)-length(seeds), as.vector(s_vector)[-(1:length(seeds))])
    rds$times <- c(rep(0, length(seeds)),times)
    
    #initialize parameters
    current_sample <- rds$network_recruited
    mle <- mle_lambda(rds, rds$network_recruited, seeds)
    current_log_likelihood <- log_likelihood(rds, rds$network_recruited, mle, seeds)
    
    for(i in 1:5000){
      #propose new graph - equally likely to propose subtraction and addition
      proposal <- new_proposal_flip(rds, current_sample)
      proposal <- proposal$new_sample
      
      #find lambda mle and determine likelihood at the mle
      proposal_mle <- mle_lambda(rds, proposal, seeds)
      proposal_likelihood <- log_likelihood(rds, proposal, proposal_mle, seeds)
      
      
      if(as.matrix(proposal_likelihood)> as.matrix(current_log_likelihood)){
        
        #climb is proposal is better than current parameters
        current_log_likelihood <- proposal_likelihood
        current_sample <- proposal
        mle <- proposal_mle
      } 
    }
    mle_bias_vec[k] <- mle
    print(mle)
    

    rds$times <- true_times
  }
  mle <- mean(mle_bias_vec)
  print(static_lambda)
  print(mle)
  #calculate the bias
  bias <- mle - static_lambda
  bias_vec[j] <- as.numeric(bias)
  mle_vec[j] <- as.numeric(mle)
}

distances <- abs(mle_vec - given_mle)

true_lambda_est <- lambda_seq[which(min(distances) == distances)]


#final estimate of G_S based on the IIE of lambda

#estimate G_S

current_sample <- rds$network_recruited
current_log_likelihood <- log_likelihood(rds, current_sample, true_lambda_est, seeds)

#hill climb to find optimal G_S
for(i in 1:7000){
  
  #evaluate new proposal with static mle
  proposal <- new_proposal_flip(rds, current_sample)
  proposal <- proposal$new_sample
  proposal_likelihood <- log_likelihood(rds, proposal, true_lambda_est, seeds)
  
  #determine whether new graph is better for static mle
  if(as.matrix(proposal_likelihood)> as.matrix(current_log_likelihood)){
    current_log_likelihood <- proposal_likelihood
    current_sample <- proposal
  } 
}

true_G_S_est <- current_sample
true_edges_est <- as.numeric(sum(true_G_S_est))

#accuracy metrics 

true_graph <- rds$true_network

rec_num <- dim(true_graph)[1]


#accuracy metrics for first estimate original_G_S_est
estimated_subgraph <- original_G_S_est
Accuracy_O <- (sum(true_graph == estimated_subgraph)- rec_num)/( rec_num^2 )
TPR_O <- sum((true_graph == estimated_subgraph) * true_graph)/(sum(estimated_subgraph))
TNR_O <- (sum((true_graph == estimated_subgraph)*(1-true_graph)) - rec_num)/
  (sum(1-estimated_subgraph) - rec_num)

#accuracy metrics for real estimate true_G_S_est
estimated_subgraph <- true_G_S_est
Accuracy_T <- (sum(true_graph == estimated_subgraph)- rec_num)/( rec_num^2 )
TPR_T <- sum((true_graph == estimated_subgraph) * true_graph)/(sum(estimated_subgraph))
TNR_T <- (sum((true_graph == estimated_subgraph)*(1-true_graph)) - rec_num)/
  (sum(1-estimated_subgraph) - rec_num)



p <- prob
rds$times <- true_times

#different regularization settings
alpha_vec <- c(0.01,0.1, 1,  10, 100, 500, 1000)
beta_vec <- alpha_vec*(1-prob)/prob
alpha_vec <- c(1, alpha_vec)
beta_vec <- c(1, beta_vec)
c <- 0

#estimate population size
print(given_edges)
degrees <- rds$degrees[rds$recruited]
true_graph <- rds$true_network
n <- as.numeric(dim(true_graph)[1])

results <-  matrix(nrow = 0, ncol = 25)
print(alpha_vec)
print(beta_vec)
for (i in 1:length(alpha_vec)){

  #establish regularization
  alpha <- alpha_vec[i]
  beta <- beta_vec[i]
  
  #the regularized mle given the true subgraph
  full_true_est_N <- regularized_mle_N(alpha, beta, c, true_graph, degrees, n)
  
  
  #the regularized mle given the MLE subgraph
  full_Craw_est_N <- regularized_mle_N(alpha, beta, c, original_G_S_est, degrees, n)
  
  #the regularized mle given the IIE subgraph
  full_better_est_N <- regularized_mle_N(alpha, beta, c, true_G_S_est, degrees, n)
  
  true_edges <- as.numeric(sum(rds$true_network))

  results <- rbind(results, c(prop, epsilon, N, prob*N, best_mle_know, true_lambda_est, given_mle,
               prob_T,
               true_edges, true_edges_est, given_edges,
               Accuracy_T, Accuracy_O,
               TPR_T, TPR_O,
               TNR_T, TNR_O,
               alpha, beta,
               full_true_est_N, (full_true_est_N - N)^2,
               full_Craw_est_N, (full_Craw_est_N - N)^2, 
               full_better_est_N, (full_better_est_N - N)^2
  ))
}

results_frame <- data.frame(results)


colnames(results_frame) <- c("sb_prop", "epsilon", 
                             "N", "Np", 
                             "best MLE Gs Known", "Alt Lambda Est",
                             "Crawford Lambda Est",
                             "True Prob",
                             "Es Size", "Alt Es Size", "Crawford Es Size",
                             "Alt Accuracy", "Crawford Accuracy",
                             "Alt TPR", "Crawford TPR",
                             "Alt TNR", "Crawford TNR",
                             "alpha", "beta",
                             "Full Perf N Est", "Full Perf N MSE",
                             "Full Crawford N est", "Full Crawford N MSE",
                             "Full Alt N Est", "Full Alt N MSE")


saveRDS(results_frame, paste0('biasLambda_', taskID, '.rds')) 
